pacman::p_load(spdep, maps, spData, spatstat, maptools)
Load Data
data1 <- read_csv("/home/leonardr/Spatial Data NBA/Data/shots1.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name = col_character(),
## team_name = col_character(),
## game_date = col_date(format = ""),
## seconds_remaining = col_character(),
## action_type = col_character(),
## shot_type = col_character(),
## opponent = col_character(),
## defender_name = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
data2 <- read_csv("/home/leonardr/Spatial Data NBA/Data/shots2.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name = col_character(),
## team_name = col_character(),
## game_date = col_date(format = ""),
## seconds_remaining = col_character(),
## action_type = col_character(),
## shot_type = col_character(),
## opponent = col_character(),
## defender_name = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
data3 <- read_csv("/home/leonardr/Spatial Data NBA/Data/shots3.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name = col_character(),
## team_name = col_character(),
## game_date = col_date(format = ""),
## seconds_remaining = col_character(),
## action_type = col_character(),
## shot_type = col_character(),
## opponent = col_character(),
## defender_name = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
data4 <- read_csv("/home/leonardr/Spatial Data NBA/Data/shots4.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name = col_character(),
## team_name = col_character(),
## game_date = col_date(format = ""),
## seconds_remaining = col_character(),
## action_type = col_character(),
## shot_type = col_character(),
## opponent = col_character(),
## defender_name = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
data5 <- read_csv("/home/leonardr/Spatial Data NBA/Data/shots5.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name = col_character(),
## team_name = col_character(),
## game_date = col_date(format = ""),
## seconds_remaining = col_character(),
## action_type = col_character(),
## shot_type = col_character(),
## opponent = col_character(),
## defender_name = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
data6 <- read_csv("/home/leonardr/Spatial Data NBA/Data/shots6.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name = col_character(),
## team_name = col_character(),
## game_date = col_date(format = ""),
## seconds_remaining = col_character(),
## action_type = col_character(),
## shot_type = col_character(),
## opponent = col_character(),
## defender_name = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
data <- rbind(data1, data2, data3, data4, data5, data6)
data <- data %>%
dplyr::select(x, y, name, team_name, period, minutes_remaining,
seconds_remaining, shot_made_flag, shot_distance,
dribbles, touch_time, defender_distance, shot_clock)
lillard <- read_csv("/home/leonardr/Spatial Data NBA/Data/lillard.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name = col_character(),
## team_name = col_character(),
## game_date = col_date(format = ""),
## seconds_remaining = col_character(),
## action_type = col_character(),
## shot_type = col_character(),
## opponent = col_character(),
## defender_name = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
gobert <- read_csv("/home/leonardr/Spatial Data NBA/Data/gobert.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## name = col_character(),
## team_name = col_character(),
## game_date = col_date(format = ""),
## seconds_remaining = col_character(),
## action_type = col_character(),
## shot_type = col_character(),
## opponent = col_character(),
## defender_name = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
Wrangling
data <- data %>%
filter(shot_distance < 47) %>%
mutate(shot_outcome = as_factor(shot_made_flag))
lillard <- lillard %>%
filter(shot_distance < 47) %>%
mutate(shot_outcome = as_factor(shot_made_flag)) %>%
drop_na()
gobert <- gobert %>%
filter(shot_distance < 47) %>%
mutate(shot_outcome = as_factor(shot_made_flag)) %>%
drop_na()
Lillard
ggplot(lillard,
aes(x = x,
y = y)) +
geom_jitter(alpha = 0.5, aes(color = shot_outcome)) +
scale_color_manual(name = " ",
labels = c("Miss", "Make"),
values = c("0" = "palevioletred2", "1" = "deepskyblue")) +
ggtitle("Damian Lillard Shot Chart") +
theme_classic()

Density
ggplot(lillard,
aes(x = x,
y = y)) +
stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
scale_fill_distiller(palette = "Spectral", direction = -1) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme(legend.position='none') +
ggtitle("Damian Lillard Density Plot") +
theme_classic()

Gobert
ggplot(gobert,
aes(x = x,
y = y)) +
geom_jitter(alpha = 0.5, aes(color = shot_outcome)) +
scale_color_manual(name = " ",
labels = c("Miss", "Make"),
values = c("0" = "palevioletred2", "1" = "deepskyblue")) +
ggtitle("Rudy Gobert Shot Chart") +
theme_classic()

Density
ggplot(gobert,
aes(x = x,
y = y)) +
stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
scale_fill_distiller(palette = "Spectral", direction = -1) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme(legend.position='none') +
coord_cartesian(xlim =c(-100, 100)) +
ggtitle("Rudy Gobert Density Plot") +
theme_classic()

Summary Data
summary_data <- data %>%
group_by(name) %>%
summarize(avg_dist = mean(shot_distance),
avg_dribbles = mean(dribbles),
avg_touch_time = mean(touch_time),
avg_defender = mean(defender_distance),
avg_clock = mean(shot_clock),
fg = sum(shot_made_flag == 1),
fga = (sum(shot_made_flag == 0)) + sum(shot_made_flag == 1),
pct = fg/fga)
summary_data <- summary_data %>%
filter(fga >= 100)
Scatterplots
# Average Distance vs. FG %
ggplot(summary_data,
aes(x = avg_dist,
y = pct)) +
geom_smooth(method = lm,
color = "grey60",
alpha = 0.3) +
geom_point(size = 2, alpha = 0.5, color = "darkorange") +
labs(x = "Average Shot Distance (ft)",
y = "Field Goal %") +
theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor(summary_data$avg_dist,
summary_data$pct)
## [1] -0.7265144
# Dribbles vs. FG %
ggplot(summary_data,
aes(x = avg_dribbles,
y = pct)) +
geom_smooth(method = lm,
color = "grey60",
alpha = 0.3) +
geom_point(size = 2, alpha = 0.5, color = "darkorange") +
labs(x = "Average Dribbles",
y = "Field Goal %") +
theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor(summary_data$avg_dribbles,
summary_data$pct)
## [1] -0.3046269
# Touch Time vs. FG %
ggplot(summary_data,
aes(x = avg_touch_time,
y = pct)) +
geom_smooth(method = lm,
color = "grey60",
alpha = 0.3) +
geom_point(size = 2, alpha = 0.5, color = "darkorange") +
labs(x = "Average Touch Time (sec)",
y = "Field Goal %") +
theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor(summary_data$avg_touch_time,
summary_data$pct)
## [1] -0.2673416
# Touch Time vs. FG %
ggplot(summary_data,
aes(x = avg_defender,
y = pct)) +
geom_smooth(method = lm,
color = "grey60",
alpha = 0.3) +
geom_point(size = 2, alpha = 0.5, color = "darkorange") +
labs(x = "Average Defender Distance (ft)",
y = "Field Goal %") +
theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor(summary_data$avg_defender,
summary_data$pct)
## [1] -0.2705009
# Shot Clock vs. FG %
ggplot(summary_data,
aes(x = avg_clock,
y = pct)) +
geom_smooth(method = lm,
color = "grey60",
alpha = 0.3) +
geom_point(size = 2, alpha = 0.5, color = "darkorange") +
labs(x = "Average Defender Distance (ft)",
y = "Field Goal %") +
theme_classic()
## `geom_smooth()` using formula 'y ~ x'

cor(summary_data$avg_clock,
summary_data$pct)
## [1] 0.05299949
League-wide Shot Chart
ggplot(data,
aes(x = x,
y = y)) +
geom_jitter(alpha = 0.3, aes(color = shot_outcome)) +
scale_color_manual(name = " ",
labels = c("Miss", "Make"),
values = c("0" = "palevioletred2", "1" = "deepskyblue")) +
ggtitle("NBA Shot Chart") +
theme_classic()

League-wide Density
ggplot(data,
aes(x = x,
y = y)) +
stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
scale_fill_distiller(palette = "Spectral", direction = -1) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme(legend.position='none') +
ggtitle("NBA Density Plot") +
theme_classic()

Covariate Raster
data_short <- data %>%
dplyr::select(shot_distance, dribbles,
defender_distance, shot_clock, shot_made_flag)
pts <- SpatialPoints(c(data[ ,1], data[ ,2]))
spData <- SpatialPointsDataFrame(pts, data = data_short)
r <- raster(spData)
res(r) <- 5
f <- rasterize(spData, r)
plot(f)

Logistic Model
model <- glm(shot_made_flag ~ shot_distance + dribbles + defender_distance + shot_clock,
data = data, family = "binomial")
summary(model)
##
## Call:
## glm(formula = shot_made_flag ~ shot_distance + dribbles + defender_distance +
## shot_clock, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3369 -1.0681 -0.8601 1.1415 1.9625
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3064537 0.0086491 35.432 <2e-16 ***
## shot_distance -0.0444108 0.0005180 -85.728 <2e-16 ***
## dribbles -0.0424913 0.0018655 -22.777 <2e-16 ***
## defender_distance 0.0384715 0.0020848 18.454 <2e-16 ***
## shot_clock 0.0006611 0.0007793 0.848 0.396
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 273991 on 198922 degrees of freedom
## Residual deviance: 265569 on 198918 degrees of freedom
## AIC: 265579
##
## Number of Fisher Scoring iterations: 4
kable(summary(model)$coef, digits = c(3, 3, 3, 4),
"latex", booktabs=T)